const axios = require('axios');
const xmlParse = require('fast-xml-parser').parse;
let config = require('../config');
let _ = require('lodash');
let sequelize = require('../libs/db.lib');
let moment = require('moment');
let Segment = require('segment');
let pinyin4js = require('pinyin4js');
let schedule = require('node-schedule');
let siteModel = require('../models/site.model');
let videoModel = require('../models/video.model');

// 中文分词模块
let segment = new Segment();
// 使用默认的识别模块及字典
segment.useDefault();

function sleep(time = 0) {
    return new Promise((resolve, reject) => {
        setTimeout(() => {
            resolve();
        }, time);
    });
}

// 定时执行采集当天的任务 每小时的10 和 40 分钟
let rule = new schedule.RecurrenceRule();
// 每小时采集一次
rule.minute = [5];
schedule.scheduleJob(rule, async function () {
    if (config.system.autoCJ) {
        console.log('执行定时采集...');
        let ss = await siteModel.findAll({attributes: ['flag']}).catch(e => {
            console.error(e);
        });
        if (ss) {
            for (let index in ss) {
                let flag = ss[index].flag;
                await cjSiteIter(flag, 24, 0);
            }
        }
    }
});

/**
 * 采集所有站点 所有视频
 */
async function cjAllSite() {
    console.log('执行采集所有...');
    let ss = await siteModel.findAll({attributes: ['flag', 'cj']}).catch(e => {
        console.error(e);
    });

    if (ss) {
        for (let index in ss) {
            let flag = ss[index].flag;
            let cjInfo = ss[index].cj;
            let page = 0;
            if (cjInfo && cjInfo.pg) {
                page = cjInfo.pg;
            }
            await cjSiteIter(flag, 0, page);
        }
        return null;
    }
}

/**
 * 递归采集
 * @param flag
 * @param hour
 * @param page
 */
function cjSiteIter(flag, hour, page) {
    return new Promise(async function (resolve, reject) {
        let result = await siteCJTask(flag, hour, page).catch(e => {
            console.error(e);
        });
        while (result != null && result.page < result.pageCount) {
            // 等3秒采集下一页
            await sleep(3000);
            result = await siteCJTask(flag, hour, result.page + 1).catch(e => {
                console.error(e);
            });
        }
        resolve();
    });
}

/**
 * @param promise
 * @param parseJson
 * @returns {Promise<any>}
 */
function requestXML2Json(promise, parseJson) {
    return new Promise(function (resolve, reject) {
        promise.then(res => {
            try {
                let json = xmlParse(res.data, {
                    attributeNamePrefix: '_',
                    textNodeName: '_val',
                    ignoreAttributes: false,
                    parseAttributeValue: true,
                });
                resolve(parseJson(json));
            } catch (e) {
                console.error(e);
                reject(e);
            }
        }).catch(err => {
            console.error(err);
            reject(err);
        });
    });
}

/**
 * 自适应内容
 * @param content
 * @returns {string}
 */
function fitContent(content, def = '') {
    if (_.isNil(content) || (_.isString(content) && content.trim().length === 0)) {
        return def;
    }
    return _.toString(content).trim();
}

/**
 * 合并 tag 多个组合 方便搜索
 * @param input
 * @param limit
 * @returns {Array}
 */
function tagCombine(input, limit = 99) {
    let result = [];
    for (let i = 0; i < input.length; i++) {
        let tmp = input[i];
        for (let j = i + 1; j < input.length; j++) {
            tmp += input[j];
            result.push(tmp);
            if (j - i > limit) {
                break;
            }
        }
        // 只处理 首字
        break;
    }
    return result;
}

/**
 * 生成搜索标签
 * @param content
 * @returns {string}
 */
function genTags(content) {
    content = removeHtmlTag(fitContent(content));
    if (content.length === 0) {
        return '';
    }
    let name = content.toString().trim();
    let result = [name];
    // 拼音首字母
    let szm = pinyin4js.convertToPinyinString(name, '', pinyin4js.FIRST_LETTER);
    result.push(szm);
    result.push(szm.toUpperCase());
    // 名称分词
    let resultName = segment.doSegment(name, {
        stripPunctuation: true,
        simple: true,
    });
    // 拼音
    let pinyin = pinyin4js.convertToPinyinString(name, ',', pinyin4js.WITHOUT_TONE);
    let resultPy = segment.doSegment(pinyin, {
        stripPunctuation: true,
        simple: true,
    });
    result = _.union(result, resultName, tagCombine(resultName), resultPy, tagCombine(resultPy, 5));
    let tags = result.join(' ');
    if (tags.length > 1024) {
        tags = tags.substr(0, 1024);
    }
    return tags;
}

/**
 * 处理时间
 * @param content
 */
function fitTime(content) {
    if (_.isNumber(content)) {
        return content;
    }
    if (_.isNil(content) || (_.isString(content) && _.toString(content).trim().length === 0)) {
        console.error(content);
        return moment().valueOf();
    }
    try {
        return moment(_.toString(content).trim()).valueOf();
    } catch (e) {
        console.log(e);
        return moment().valueOf();
    }
}

/**
 * 处理年份
 * @param content
 * @returns {Number}
 */
function fitYear(content) {
    if (_.isNil(content) || (_.isString(content) && content.trim().length === 0) || _.toString(content).trim().length === 0 || _.toString(content).trim() === '0') {
        return 0;
    }
    try {
        return _.toInteger(content);
    } catch (e) {
        console.log(e);
        return 0;
    }
}

function removeHtmlTag(content) {
    const regex = /(&nbsp;|<([^>]+)>)/ig;
    return content.replace(regex, '');
}

/**
 * 处理演员
 * @param content
 * @returns {*}
 */
function fitActor(content) {
    let result = removeHtmlTag(fitContent(content));
    if (_.toString(result) === '0') {
        return '';
    }
    let actors = result.split(/[,/; ]/);
    for (let i = actors.length - 1; i >= 0; i--) {
        actors[i] = actors[i].trim();
        if (actors[i].length === 0) {
            actors.splice(i, 1);
        }
    }
    return actors.join(',');
}

/**
 * 处理封面图片
 * @param pic
 * @returns {*}
 */
function fitPic(pic) {
    pic = fitContent(pic);
    if (pic.length === 0) {
        return '';
    }
    if (config.system.pic2tu.fixs) {
        for (const fix of config.system.pic2tu.fixs) {
            if (pic.indexOf(fix) >= 0) {
                pic = pic.substr(pic.indexOf(fix) + fix.length);
                break;
            }
        }
    }
    return pic;
}

/**
 * 解析 dd.dl字段
 * @param dd
 * @returns {*}
 */
function fitDd(dd) {
    if (!dd._flag || !dd._val) {
        return null;
    }
    const flagTmp = dd._flag.toString().trim();
    if (flagTmp.length === 0 || dd._val.toString().trim().length === 0) {
        return null;
    }
    const linksTmp = [];
    let index = 1;
    for (const ll of dd._val.split('#')) {
        let linkName = '';
        let linkUrl = '';
        if (ll.indexOf('$') >= 0) {
            const linkData = ll.split('$');
            linkName = linkData[0].trim();
            linkUrl = linkData[1].trim();
        } else {
            linkUrl = ll.trim();
        }
        if (linkName.trim().length === 0) {
            linkName = '' + index;
        }
        index++;
        if (linkUrl.trim().length === 0) {
            continue;
        }
        linksTmp.push({
            name: linkName,
            url: linkUrl,
        });
    }

    return {
        src: flagTmp,
        list: linksTmp,
    };
}

/**
 * 解析视频
 * @param vod
 * @returns {*}
 */
function parseVideo(vod) {
    let tmp = {
        // 来源站点
        site: null,
        // 采集站id
        oid: vod.id,
        ocid: vod.tid,
        // 绑定分类
        cid: null,
        // 标题
        name: removeHtmlTag(fitContent(vod.name)),
        // tag 标签 这个字段是用来做 查找检索的
        tags: genTags(vod.name),
        // 最后更新时间
        last: fitTime(vod.last),
        // 地区
        area: removeHtmlTag(fitContent(vod.area)),
        // 年代
        year: fitYear(vod.year),
        // 演员
        actor: fitActor(vod.actor),
        // 导演
        director: fitActor(vod.director),
        // 封面
        pic: fitPic(vod.pic),
        // 描述
        desc: removeHtmlTag(fitContent(vod.des)),
        // 播放列表
        playlist: {},
    };
    // 解析播放列表
    let playlist = {};
    if (vod.dl && vod.dl.dd) {
        if (Array.isArray(vod.dl.dd)) {
            for (const d of vod.dl.dd) {
                let play = fitDd(d);
                if (play)
                    playlist[play.src] = play.list;
            }
        } else {
            let play = fitDd(vod.dl.dd);
            if (play)
                playlist[play.src] = play.list;
        }
    }
    tmp.playlist = playlist;
    return tmp;
}

/**
 * 获取 采集站的视频分类
 * @param api
 */
async function siteClass(api) {
    return requestXML2Json(axios.get(api, {proxy: config.spiderProxy}), (json) => {
        return json.rss.class.ty;
    });
}

/**
 * 采集任务
 * @param flag
 * @param hour
 * @param page
 * @returns {Promise<*>}
 */
async function siteCJTask(flag, hour, page) {
    return new Promise(function (resolve, reject) {
        siteModel.findOne({where: {flag: flag}}).then(async (s) => {
            if (!s) {
                reject(`唯一标识${flag}不存在！`);
                return;
            }
            if (!s.enable) {
                reject(`站点${flag} - ${s.name} 未启用！`);
                return;
            }

            let params = {
                ac: 'videolist',
                h: hour,
                pg: page,
            };
            if (hour === 0) {
                if (_.isNil(s.cj)) {
                    s.cj = {};
                }
                let cjInfo = s.cj;
                cjInfo.pg = page;
                s.cj = cjInfo;
                await s.save().then().catch(e => console.error(e));
            }
            await requestXML2Json(axios.get(s.api, {params: params, proxy: config.spiderProxy}), (json) => {
                let pageInfo = {
                    page: 0,
                    pageCount: 0,
                    pageSize: 0,
                    recordCount: 0,
                    videos: [],
                };
                if (json.rss.list._pagecount === 0) {
                    return pageInfo;
                }
                pageInfo.page = json.rss.list._page;
                pageInfo.pageCount = json.rss.list._pagecount;
                pageInfo.pageSize = json.rss.list._pagesize;
                pageInfo.recordCount = json.rss.list._recordcount;
                if (!_.isNil(json.rss.list.video) && _.isArray(json.rss.list.video)) {
                    for (let vod of json.rss.list.video) {
                        let video = parseVideo(vod);
                        if (video) {
                            pageInfo.videos.push(video);
                        }
                    }
                }
                return pageInfo;
            }).then(async (cjInfo) => {
                await sequelize.transaction().then(async (t) => {
                    let videos = cjInfo.videos;
                    // 入库
                    for (let vod of videos) {
                        vod.site = s.flag;
                        let cid = s.binds[vod.ocid];
                        // 分类未绑定 不处理
                        if (_.isNil(cid)) {
                            vod.status = '未绑定分类,不处理';
                            continue;
                        }
                        vod.cid = cid;

                        // 判断是否相同来源 相同id的视频如果是 更新
                        let v1 = await videoModel.findOne({attributes: ['id', 'playlist'], where: {site: s.flag, oid: vod.oid}}).catch(e => {
                            console.error(e);
                        });
                        if (v1) {
                            vod.status = '相同来源,更新';
                            vod.playlist = mergePlaylist(v1.playlist, vod.playlist);
                            v1.pic = vod.pic;
                            v1.last = vod.last;
                            v1.playlist = vod.playlist;
                            await v1.save().then().catch(e => {
                                console.error(e);
                            });
                            continue;
                        }
                        // 判断名称是否相等 为防止同名不同类 增加tid判断 演员一样 或 绝大部分相同 或导演一样
                        let v2s = await videoModel.findAll({attributes: ['id', 'name', 'cid', 'actor', 'director', 'playlist'], where: {name: vod.name}}).catch(e => {
                            console.error(e);
                        });
                        let found = false;
                        if (v2s) {
                            for (let idx in v2s) {
                                let v2 = v2s[idx];
                                // 分类一样 并且 （演员一样 或 有相同相同 或导演一样）
                                if (v2 && v2.cid === vod.cid && (v2.actor === vod.actor
                                    || _.intersection(v2.actor.split(','), vod.actor.split(',')).length > 0
                                    || v2.director === vod.director
                                    || _.intersection(v2.director.split(','), vod.director.split(',')).length > 0)) {
                                    vod.status = '相同片名,更新';
                                    vod.playlist = mergePlaylist(v2.playlist, vod.playlist);
                                    v2.pic = vod.pic;
                                    v2.last = vod.last;
                                    v2.playlist = vod.playlist;
                                    await v2.save().then().catch(e => {
                                        console.error(e);
                                    });
                                    found = true;
                                    break;
                                }
                            }
                        }
                        if (!found) {
                            vod.status = '新增';
                            await videoModel.create(vod).then().catch(e => {
                                console.error(e);
                            });
                        }
                    }
                    t.commit();
                    resolve(cjInfo);
                }).catch(function (e) {
                    console.error(e);
                    reject(e.message);
                });
            }).catch(e => {
                console.error(e);
                reject(e.message);
            });
        }).catch(e => {
            console.error(e);
            reject(e.message);
        });
    });
}

/**
 * 合并playlist
 * @param playlist
 * @param mergePL
 * @returns {*}
 */
function mergePlaylist(playlist, mergePL) {
    if (mergePL) {
        for (let src in mergePL) {
            if (!playlist) {
                playlist = [];
            }
            if (playlist.hasOwnProperty(src)) {
                playlist[src] = mergePL[src];
            } else {
                playlist[src] = mergePL[src];
            }
        }
    }
    return playlist;
}

module.exports = {
    siteClass: siteClass,
    siteCJTask: siteCJTask,
    cjAllSite: cjAllSite,
};
